Initial Setup

Set up our necessary packages. Uncomment the install.packages line the first time you run through this. Set your home directory to be the src root of this project. You’ll need to change this before you get started.

# install.packages(c("plyr", "readr", "ggplot2", "dply", "fitdistrplus", "anytime", "data.table", "knitr", "tinytex"))
library(plyr)
library(readr)
library(ggplot2)
library(dplyr)
library(fitdistrplus)
library(anytime)
library(data.table)
library(knitr)

setwd('/Users/daytonpe/Dropbox/utd/6316_stat_methods_for_ds_akcora/project/src')

Load Data

Load in all of our data. The modulo of the sum of our UTD IDs was 2, so we will be using Tronix, Omisego, and YoCoin for our analysis.

# First our price files
omg_price_df = read.table("./tokenPrices/omisego.txt",
                 col.names = c('Date',  'Open', 'High', 'Low',  'Close',    'Volume',   'MarketCap'),
                 skip = 1,
                 header = FALSE)

trn_price_df = read.table("./tokenPrices/tron",
                 col.names = c('Date',  'Open', 'High', 'Low',  'Close',    'Volume',   'MarketCap'),
                 skip = 1,
                 header = FALSE)

yoc_price_df = read.table("./tokenPrices/yocoin",
                 col.names = c('Date',  'Open', 'High', 'Low',  'Close',    'Volume',   'MarketCap'),
                 skip = 1,
                 header = FALSE)

# Next our edge files

omg_edge_df <- read_delim('./edgeFiles/omisego.txt', delim = " ", col_names = F)
trn_edge_df <- read_delim('./edgeFiles/tron.txt',    delim = " ", col_names = F)
yoc_edge_df <- read_delim('./edgeFiles/yo.txt',      delim = " ", col_names = F)

# and label these as well
names(omg_edge_df) <- c('fromID', 'toID', 'unixTime', 'tokenAmount')
names(trn_edge_df) <- c('fromID', 'toID', 'unixTime', 'tokenAmount')
names(yoc_edge_df) <- c('fromID', 'toID', 'unixTime', 'tokenAmount')

Prepare the Data

Remove Duplicates

Check for duplicated values in all of our files and remove them.

OMG

cat("omg_price_df duplicates: ", anyDuplicated(omg_price_df), "  \n")
## omg_price_df duplicates:  0
cat("omg_edge_df  duplicates: ", anyDuplicated(omg_price_df), "  \n")
## omg_edge_df  duplicates:  0
omg_price_df <- omg_price_df %>% distinct()
omg_edge_df  <- omg_edge_df  %>% distinct()
cat("omg_edge_df  duplicates: ", anyDuplicated(omg_edge_df),  "  \n") # after duplicates removed
## omg_edge_df  duplicates:  0
cat("omg_price_df duplicates: ", anyDuplicated(omg_price_df), "  \n") # after duplicates removed
## omg_price_df duplicates:  0

TRX

cat("trn_price_df duplicates: ", anyDuplicated(trn_price_df), "  \n")
## trn_price_df duplicates:  0
cat("trn_edge_df  duplicates: ", anyDuplicated(trn_edge_df),  "  \n")
## trn_edge_df  duplicates:  1536
trn_price_df <- trn_price_df %>% distinct()
trn_edge_df  <- trn_edge_df  %>% distinct()
cat("trn_price_df duplicates: ", anyDuplicated(trn_price_df), "  \n") # after duplicates removed
## trn_price_df duplicates:  0
cat("trn_edge_df  duplicates: ", anyDuplicated(trn_edge_df),  "  \n") # after duplicates removed
## trn_edge_df  duplicates:  0

YOC

cat("yoc_price_df duplicates: ", anyDuplicated(yoc_price_df), "  \n")
## yoc_price_df duplicates:  0
cat("yoc_edge_df  duplicates: ", anyDuplicated(yoc_edge_df),  "  \n")
## yoc_edge_df  duplicates:  992
yoc_price_df <- yoc_price_df %>% distinct()
yoc_edge_df  <- yoc_edge_df  %>% distinct()
cat("yoc_price_df duplicates: ", anyDuplicated(yoc_price_df), "  \n") # after duplicates removed
## yoc_price_df duplicates:  0
cat("yoc_edge_df  duplicates: ", anyDuplicated(yoc_edge_df),  "  \n") # after duplicates removed
## yoc_edge_df  duplicates:  0

Reformat Price File Dates

Convert the date to the correct format in the price data frames.

omg_price_df$Date = as.Date(omg_price_df$Date,format='%m/%d/%Y')
trn_price_df$Date = as.Date(trn_price_df$Date,format='%m/%d/%y')
yoc_price_df$Date = as.Date(yoc_price_df$Date,format='%m/%d/%y')

Filter Impossibly Large Transactions

Set our constants for each coin, then remove edge file rows where token amount is too big to make sense. Note: Only YOC had records needing to be removed.

omg_decimals = 10^18
trn_decimals = 10^6
yoc_decimals = 10^16 

omg_supply = 140245398
trn_supply = 66682072191
yoc_supply = 369659255

OMG

omg_edge_df_filtered = omg_edge_df %>% filter(tokenAmount < omg_decimals * omg_supply)
cat("Num Rows before Filtering: ", nrow(omg_edge_df), "\n")
## Num Rows before Filtering:  1143029
cat("Num Rows after Filtering: ", nrow(omg_edge_df_filtered), "\n")
## Num Rows after Filtering:  1143018
cat("Num Rows cut: ", (nrow(omg_edge_df)-nrow(omg_edge_df_filtered)), "\n")
## Num Rows cut:  11
omg_edge_df = omg_edge_df %>% filter(tokenAmount <= omg_decimals * omg_supply)

TRN

tron_edge_df_filtered = trn_edge_df %>% filter(tokenAmount < trn_decimals*trn_supply)
cat("Num Rows before Filtering: ", nrow(trn_edge_df), "\n")
## Num Rows before Filtering:  1512662
cat("Num Rows after Filtering: ", nrow(tron_edge_df_filtered), "\n")
## Num Rows after Filtering:  1512580
cat("Num Rows cut: ", (nrow(trn_edge_df)-nrow(tron_edge_df_filtered)), "\n")
## Num Rows cut:  82
trn_edge_df = trn_edge_df %>% filter(tokenAmount <= trn_decimals * trn_supply)

YOC

yocoin_edge_df_filtered = yoc_edge_df %>% filter(yoc_edge_df$tokenAmount < yoc_decimals * yoc_supply)
cat("Num Rows before Filtering: ", nrow(yoc_edge_df), "\n")
## Num Rows before Filtering:  595582
cat("Num Rows after Filtering: ", nrow(yocoin_edge_df_filtered), "\n")
## Num Rows after Filtering:  595492
cat("Num Rows cut: ", (nrow(yoc_edge_df)-nrow(yocoin_edge_df_filtered)), "\n")
## Num Rows cut:  90
yoc_edge_df = yoc_edge_df %>% filter(tokenAmount <= yoc_decimals * yoc_supply)

Reformat Edge File Dates

Update the edge data frame dates to be the correct format.

omg_edge_df$Date = anydate(omg_edge_df$unixTime)
trn_edge_df$Date = anydate(trn_edge_df$unixTime)
yoc_edge_df$Date = anydate(yoc_edge_df$unixTime)

Feature Engineering

Determine some extrea features on which we can create our multiple linear regressions.

Determine Buys/Sells by Top Buyers/Sellers

Calculate number of buys and sells by user_id Great description here: https://stackoverflow.com/questions/25869378/what-does-n-n-mean-in-r

omg_buys  <- omg_edge_df %>% group_by(toID) %>% summarise(n = n()) %>% ungroup
trn_buys  <- trn_edge_df %>% group_by(toID) %>% summarise(n = n()) %>% ungroup
yoc_buys  <- yoc_edge_df %>% group_by(toID) %>% summarise(n = n()) %>% ungroup

omg_sells <- omg_edge_df %>% group_by(fromID) %>% summarise(n = n()) %>% ungroup
trn_sells <- trn_edge_df %>% group_by(fromID) %>% summarise(n = n()) %>% ungroup
yoc_sells <- yoc_edge_df %>% group_by(fromID) %>% summarise(n = n()) %>% ungroup

Filter by Top-K Buyers and Build Features

Filter to only include top K buyers and build a dataframe with the summarized data for fitting a regression model. Features we create here include: - Avg_Tok_Amt: Average Token Amount traded for the top-K users on the given day - Tot_Tok_Amt: Total Token Amount traded by the top-K users on the given day - Transactions: Number of transactions by the top-K users on the given day - Distinct Buyers: Distinct number of buyers for a given day - Distinct Sellers: Distinct number of sellers for a given day

K_omg = 104
K_trn = 18000
K_yoc = 136

# Filter to only include top K buyers
omg_buys = omg_buys %>% arrange(-n) %>% head(K_omg)
trn_buys = trn_buys %>% arrange(-n) %>% head(K_trn)
yoc_buys = yoc_buys %>% arrange(-n) %>% head(K_yoc)

omg_top_k_buys <- omg_edge_df %>% filter(omg_edge_df$toID %in% omg_buys$toID)
trn_top_k_buys <- trn_edge_df %>% filter(trn_edge_df$toID %in% trn_buys$toID)
yoc_top_k_buys <- yoc_edge_df %>% filter(yoc_edge_df$toID %in% yoc_buys$toID)


# Create a dataframe with summarized data for fitting a regression model
omg_fit_data <- omg_top_k_buys %>% group_by(Date) %>% 
  summarise(
    Avg_Tok_Amt = mean(tokenAmount),
    Tot_Tok_Amt = sum(tokenAmount),
    Transactions = n(), 
    Distinct_Buyers = n_distinct(toID),
    Distinct_Sellers = n_distinct(fromID)
  ) %>% 
  ungroup

trn_fit_data <- trn_top_k_buys %>% group_by(Date) %>% 
  summarise(
    Avg_Tok_Amt = mean(tokenAmount),
    Tot_Tok_Amt = sum(tokenAmount),
    Transactions = n(), 
    Distinct_Buyers = n_distinct(toID),
    Distinct_Sellers = n_distinct(fromID)
  ) %>% 
  ungroup

yoc_fit_data <- yoc_top_k_buys %>% group_by(Date) %>% 
  summarise(
    Avg_Tok_Amt = mean(tokenAmount),
    Tot_Tok_Amt = sum(tokenAmount),
    Transactions = n(), 
    Distinct_Buyers = n_distinct(toID),
    Distinct_Sellers = n_distinct(fromID)
  ) %>% 
  ungroup

Join Tables

Join edge data to pricing data based on the Date. We lose a small percentage of the data here due to the fact that the timeframes for the two data files do not match perfectly.

omg_fit_data <- merge(omg_fit_data, omg_price_df, by="Date")
trn_fit_data <- merge(trn_fit_data, trn_price_df, by="Date")
yoc_fit_data <- merge(yoc_fit_data, yoc_price_df, by="Date")

Add Historic Data to Data Frame

Calculate the close values of the previous 3 days. Note: m1 refers to minus 1, i.e. one day previous

omg_fit_data$Close_m1 <- shift(omg_fit_data$Close, n=1)
omg_fit_data$Close_m2 <- shift(omg_fit_data$Close, n=2)
omg_fit_data$Close_m3 <- shift(omg_fit_data$Close, n=3)

trn_fit_data$Close_m1 <- shift(trn_fit_data$Close, n=1)
trn_fit_data$Close_m2 <- shift(trn_fit_data$Close, n=2)
trn_fit_data$Close_m3 <- shift(trn_fit_data$Close, n=3)

yoc_fit_data$Close_m1 <- shift(yoc_fit_data$Close, n=1)
yoc_fit_data$Close_m2 <- shift(yoc_fit_data$Close, n=2)
yoc_fit_data$Close_m3 <- shift(yoc_fit_data$Close, n=3)

Inspecting the Data

Let’s take a look at our data with our newly engineered features on which we will fit our multiple regression model.

omg_fit_data
trn_fit_data
yoc_fit_data

Let’s also take a look at how many days are tracked for the three tokens in our data sets. We have the most data for YOC.

cat("OMG Rows: ", nrow(omg_fit_data), "\n")
## OMG Rows:  297
cat("TRX Rows: ", nrow(trn_fit_data), "\n")
## TRX Rows:  236
cat("YOC Rows: ", nrow(yoc_fit_data), "\n")
## YOC Rows:  422

Correlation of Regressors

We chose to regress to the Close value of the token, so we will compare the correlation of each of the regressors (Xs) to the Close (Y).

We can make the observation from this data that the previous day’s prices are far more correlated to the Close price on the day when compared to the token amounts, distinct buyers, and other features we engineered. This is expected.

OMG

cat("Transactions:         ", cor(omg_fit_data$Close, omg_fit_data$Transactions), "\n")
## Transactions:          0.3133622
cat("Total Token Amount:  ", cor(omg_fit_data$Close, omg_fit_data$Tot_Tok_Amt), "\n")
## Total Token Amount:   -0.2566245
cat("Average Token Amount:", cor(omg_fit_data$Close, omg_fit_data$Avg_Tok_Amt), "\n")
## Average Token Amount: -0.4220751
cat("Distinct Buyers:      ", cor(omg_fit_data$Close, omg_fit_data$Distinct_Buyers), "\n")
## Distinct Buyers:       0.6134334
cat("Distinct Sellers:     ", cor(omg_fit_data$Close, omg_fit_data$Distinct_Sellers), "\n")
## Distinct Sellers:      0.2075846
cat("Close Minus 1:        ", cor(omg_fit_data$Close, omg_fit_data$Close_m1, use = "complete.obs"), "\n")
## Close Minus 1:         0.9786079
cat("Close Minus 2:        ", cor(omg_fit_data$Close, omg_fit_data$Close_m2, use = "complete.obs"), "\n")
## Close Minus 2:         0.9591295
cat("Close Minus 3:        ", cor(omg_fit_data$Close, omg_fit_data$Close_m3, use = "complete.obs"), "\n")
## Close Minus 3:         0.9387131

TRX

cat("Transactions:         ", cor(trn_fit_data$Close, trn_fit_data$Transactions), "\n")
## Transactions:          0.5137099
cat("Total Token Amount:   ", cor(trn_fit_data$Close, trn_fit_data$Tot_Tok_Amt), "\n")
## Total Token Amount:    0.194449
cat("Average Token Amount:", cor(trn_fit_data$Close, trn_fit_data$Avg_Tok_Amt), "\n")
## Average Token Amount: -0.1125508
cat("Distinct Buyers:      ", cor(trn_fit_data$Close, trn_fit_data$Distinct_Buyers), "\n")
## Distinct Buyers:       0.8720125
cat("Distinct Sellers:     ", cor(trn_fit_data$Close, trn_fit_data$Distinct_Sellers), "\n")
## Distinct Sellers:      0.243085
cat("Close Minus 1:        ", cor(trn_fit_data$Close, trn_fit_data$Close_m1, use = "complete.obs"), "\n")
## Close Minus 1:         0.9615547
cat("Close Minus 2:        ", cor(trn_fit_data$Close, trn_fit_data$Close_m2, use = "complete.obs"), "\n")
## Close Minus 2:         0.9165609
cat("Close Minus 3:        ", cor(trn_fit_data$Close, trn_fit_data$Close_m3, use = "complete.obs"), "\n")
## Close Minus 3:         0.86749

YOC

cat("Transactions:        ", cor(yoc_fit_data$Close, yoc_fit_data$Transactions), "\n")
## Transactions:         -0.03078097
cat("Total Token Amount:  ", cor(yoc_fit_data$Close, yoc_fit_data$Tot_Tok_Amt), "\n")
## Total Token Amount:   -0.2928949
cat("Average Token Amount:", cor(yoc_fit_data$Close, yoc_fit_data$Avg_Tok_Amt), "\n")
## Average Token Amount: -0.2878166
cat("Distinct Buyers:      ", cor(yoc_fit_data$Close, yoc_fit_data$Distinct_Buyers), "\n")
## Distinct Buyers:       0.3815441
cat("Distinct Sellers:     ", cor(yoc_fit_data$Close, yoc_fit_data$Distinct_Sellers), "\n")
## Distinct Sellers:      0.05851667
cat("Close Minus 1:        ", cor(yoc_fit_data$Close, yoc_fit_data$Close_m1, use = "complete.obs"), "\n")
## Close Minus 1:         0.9806905
cat("Close Minus 2:        ", cor(yoc_fit_data$Close, yoc_fit_data$Close_m2, use = "complete.obs"), "\n")
## Close Minus 2:         0.9751215
cat("Close Minus 3:        ", cor(yoc_fit_data$Close, yoc_fit_data$Close_m3, use = "complete.obs"), "\n")
## Close Minus 3:         0.9702466

Create the Multiple Linear Regression Model

Time to actually perform the fit via multiple linear regression. We will split each coin into two different models. The first considering the previous 3 days of Close prices. The second only focusing on the features we engineered. As the previous three days were so highly correlated with the price, they make the R^2 value significantly higher and we lose some understanding of which one of the engineered features actually contributes the most.

Note: The models ending in “_hist" take the price history for the three previous days into account. Those with “_no_hist" endings do not.

omg_fit_hist <- lm(
  Close ~ Avg_Tok_Amt +
    Tot_Tok_Amt +
    Transactions +
    Distinct_Buyers +
    Distinct_Sellers +
    Close_m1 +
    Close_m2 +
    Close_m3,
  data=omg_fit_data)

omg_fit_no_hist <- lm(
  Close ~ Avg_Tok_Amt +
    Tot_Tok_Amt +
    Transactions +
    Distinct_Buyers +
    Distinct_Sellers,
  data=omg_fit_data)

trn_fit_hist <- lm(
  Close ~ Avg_Tok_Amt +
    Tot_Tok_Amt +
    Transactions +
    Distinct_Buyers +
    Distinct_Sellers +
    Close_m1 +
    Close_m2 +
    Close_m3,
  data=trn_fit_data)

trn_fit_no_hist <- lm(
  Close ~ Avg_Tok_Amt +
    Tot_Tok_Amt +
    Transactions +
    Distinct_Buyers +
    Distinct_Sellers,
  data=trn_fit_data)

yoc_fit_hist <- lm(
  Close ~ Avg_Tok_Amt +
    Tot_Tok_Amt +
    Transactions +
    Distinct_Buyers +
    Distinct_Sellers +
    Close_m1 +
    Close_m2 +
    Close_m3,
  data=yoc_fit_data)

yoc_fit_no_hist <- lm(
  Close ~ Avg_Tok_Amt +
    Tot_Tok_Amt +
    Transactions +
    Distinct_Buyers +
    Distinct_Sellers,
  data=yoc_fit_data)

OMG Summary Data and Plots for Model with Close History

print(summary(omg_fit_hist))
## 
## Call:
## lm(formula = Close ~ Avg_Tok_Amt + Tot_Tok_Amt + Transactions + 
##     Distinct_Buyers + Distinct_Sellers + Close_m1 + Close_m2 + 
##     Close_m3, data = omg_fit_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.8643 -0.4799 -0.0258  0.5055  4.9258 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      -7.347e-02  2.614e-01  -0.281   0.7788    
## Avg_Tok_Amt      -6.106e-23  5.994e-23  -1.019   0.3092    
## Tot_Tok_Amt       1.926e-25  8.781e-26   2.193   0.0291 *  
## Transactions      1.266e-04  8.702e-05   1.455   0.1467    
## Distinct_Buyers   1.225e-02  7.139e-03   1.716   0.0872 .  
## Distinct_Sellers  7.825e-05  1.405e-04   0.557   0.5780    
## Close_m1          9.173e-01  5.791e-02  15.842   <2e-16 ***
## Close_m2          3.376e-02  7.878e-02   0.429   0.6686    
## Close_m3         -2.060e-03  5.695e-02  -0.036   0.9712    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.056 on 285 degrees of freedom
##   (3 observations deleted due to missingness)
## Multiple R-squared:   0.96,  Adjusted R-squared:  0.9589 
## F-statistic:   855 on 8 and 285 DF,  p-value: < 2.2e-16
plot(omg_fit_hist)

OMG Summary Data and Plots for Model without Close History

print(summary(omg_fit_no_hist))
## 
## Call:
## lm(formula = Close ~ Avg_Tok_Amt + Tot_Tok_Amt + Transactions + 
##     Distinct_Buyers + Distinct_Sellers, data = omg_fit_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -8.2808 -3.2030 -0.2738  2.4096 11.4253 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       3.341e+00  8.543e-01   3.911 0.000115 ***
## Avg_Tok_Amt      -2.385e-22  1.331e-22  -1.791 0.074285 .  
## Tot_Tok_Amt      -1.296e-25  2.897e-25  -0.447 0.654941    
## Transactions      1.448e-03  3.159e-04   4.585 6.75e-06 ***
## Distinct_Buyers   2.335e-01  2.283e-02  10.227  < 2e-16 ***
## Distinct_Sellers -1.334e-03  5.184e-04  -2.573 0.010573 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.99 on 291 degrees of freedom
## Multiple R-squared:  0.4418, Adjusted R-squared:  0.4322 
## F-statistic: 46.06 on 5 and 291 DF,  p-value: < 2.2e-16
plot(omg_fit_no_hist)

TRX Summary Data and Plots for Model with Close History

print(summary(trn_fit_hist))
## 
## Call:
## lm(formula = Close ~ Avg_Tok_Amt + Tot_Tok_Amt + Transactions + 
##     Distinct_Buyers + Distinct_Sellers + Close_m1 + Close_m2 + 
##     Close_m3, data = trn_fit_data)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.030480 -0.002582  0.000472  0.001864  0.066202 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      -1.382e-03  8.301e-04  -1.665  0.09741 .  
## Avg_Tok_Amt       1.331e-18  3.627e-17   0.037  0.97075    
## Tot_Tok_Amt       1.182e-19  1.597e-19   0.740  0.45994    
## Transactions     -3.004e-06  1.580e-06  -1.901  0.05861 .  
## Distinct_Buyers   3.328e-05  5.320e-06   6.256 1.98e-09 ***
## Distinct_Sellers  2.639e-06  1.730e-06   1.526  0.12846    
## Close_m1          8.077e-01  5.785e-02  13.962  < 2e-16 ***
## Close_m2          4.101e-02  7.806e-02   0.525  0.59987    
## Close_m3         -1.467e-01  5.462e-02  -2.685  0.00779 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.008241 on 224 degrees of freedom
##   (3 observations deleted due to missingness)
## Multiple R-squared:  0.9525, Adjusted R-squared:  0.9508 
## F-statistic: 561.2 on 8 and 224 DF,  p-value: < 2.2e-16
plot(trn_fit_hist)

TRX Summary Data and Plots for Models without Close History

print(summary(trn_fit_no_hist))
## 
## Call:
## lm(formula = Close ~ Avg_Tok_Amt + Tot_Tok_Amt + Transactions + 
##     Distinct_Buyers + Distinct_Sellers, data = trn_fit_data)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.080304 -0.007163 -0.001766  0.003777  0.068697 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      -1.037e-03  1.617e-03  -0.641    0.522    
## Avg_Tok_Amt       1.178e-17  7.153e-17   0.165    0.869    
## Tot_Tok_Amt      -4.220e-19  3.077e-19  -1.372    0.172    
## Transactions     -1.989e-05  2.809e-06  -7.081 1.72e-11 ***
## Distinct_Buyers   1.264e-04  7.336e-06  17.229  < 2e-16 ***
## Distinct_Sellers  2.007e-05  3.113e-06   6.449 6.57e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.01628 on 230 degrees of freedom
## Multiple R-squared:  0.8115, Adjusted R-squared:  0.8074 
## F-statistic: 198.1 on 5 and 230 DF,  p-value: < 2.2e-16
plot(trn_fit_no_hist)

YOC Summary Data and Plots for Model with Close History

print(summary(yoc_fit_hist))
## 
## Call:
## lm(formula = Close ~ Avg_Tok_Amt + Tot_Tok_Amt + Transactions + 
##     Distinct_Buyers + Distinct_Sellers + Close_m1 + Close_m2 + 
##     Close_m3, data = yoc_fit_data)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.052084 -0.001512 -0.000703  0.001339  0.044489 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       1.171e-03  7.241e-04   1.617 0.106554    
## Avg_Tok_Amt      -2.921e-25  5.028e-25  -0.581 0.561621    
## Tot_Tok_Amt      -4.565e-30  4.740e-27  -0.001 0.999232    
## Transactions     -9.850e-07  9.762e-07  -1.009 0.313549    
## Distinct_Buyers  -1.245e-05  2.951e-05  -0.422 0.673353    
## Distinct_Sellers  2.688e-06  5.304e-06   0.507 0.612531    
## Close_m1          5.855e-01  5.028e-02  11.644  < 2e-16 ***
## Close_m2          2.244e-01  5.732e-02   3.915 0.000106 ***
## Close_m3          1.595e-01  5.007e-02   3.185 0.001558 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.007628 on 410 degrees of freedom
##   (3 observations deleted due to missingness)
## Multiple R-squared:  0.9661, Adjusted R-squared:  0.9654 
## F-statistic:  1461 on 8 and 410 DF,  p-value: < 2.2e-16
plot(yoc_fit_hist)

YOC Summary Data and Plots for Models without Close History

print(summary(yoc_fit_no_hist))
## 
## Call:
## lm(formula = Close ~ Avg_Tok_Amt + Tot_Tok_Amt + Transactions + 
##     Distinct_Buyers + Distinct_Sellers, data = yoc_fit_data)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.064999 -0.026200 -0.005130  0.007418  0.138106 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       3.553e-02  3.040e-03  11.688  < 2e-16 ***
## Avg_Tok_Amt      -5.981e-24  2.411e-24  -2.481   0.0135 *  
## Tot_Tok_Amt      -4.511e-26  2.278e-26  -1.981   0.0483 *  
## Transactions     -2.628e-05  4.527e-06  -5.806 1.27e-08 ***
## Distinct_Buyers   1.118e-03  1.278e-04   8.749  < 2e-16 ***
## Distinct_Sellers  4.069e-05  2.521e-05   1.614   0.1072    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03683 on 416 degrees of freedom
## Multiple R-squared:  0.2544, Adjusted R-squared:  0.2454 
## F-statistic: 28.39 on 5 and 416 DF,  p-value: < 2.2e-16
plot(yoc_fit_no_hist)

Conclusion

We find that including the last three days of close prices really overpowers any gains we make via our engineered regressors. All three give us values over .95 for R^2 which is great! Unfortunately it this will not be able to predict quick spikes or drops in the price as it is simply going to estimate a linear trajectory based on the previous days’ action.

If we disregard the previous days’ close prices, we are able to get the follwing R^2 values after [manually] experimenting with K values representing the top K buyers. - OMG: 0.4418 (K=104) - TRN: ~0.8115 (K=~18,000) - YOC: 0.2551 (K=135)

Note that TRN’s K value which produced the highest R^2 Value was exceptionally high compared to OMG and YOC. We plan to explore why this was the case in our writeup.